import xml.etree.ElementTree as ET
from home.models import Image
def getArticles(figureID):
    result = dict()
    currImage = Image.objects.filter(id=figureID)
    figNumber = currImage[0].label
    filePath = currImage[0].article.article_file_location
    result['isSuccessful'] = True
    result['article'] = getHTMLString(figNumber, filePath)
    print result['article']
    return result

def getHTMLString(figNumber, filePath):
    inFile = open(filePath, 'r')
    resultStr = str()
    doc = ET.parse(inFile)
    resultStr += getHTMLContent(figNumber, doc)
    #highLightRelevantSentence(figNumber, resultStr)
    return resultStr

def getHTMLContent(figNumber, doc):
    content = ''
    content += getTitle(doc)
    content += "<p id=\"author\">" + getAuthor(doc) + "</p>\n";
    content += getAbstract(doc)
    content += getMain(figNumber, doc)
    return content

def getTitle(doc):
    title = ''
    for node in doc.findall("front/article-meta/title-group/article-title"):
        title += "\n<h2>" + node.text + "</h2>";
    return title

def getAuthor(doc):
    authors = ''
    surnameList = doc.findall("front/article-meta/contrib-group/contrib/name/surname")
    givennameList = doc.findall("front/article-meta/contrib-group/contrib/name/given-names")
    for i in range(len(surnameList)):
        surnameNode = surnameList[i]
        givennameNode = givennameList[i]
        if i == len(surnameList) - 1:
            authors += surnameNode.text + " " + givennameNode.text
        else:
            authors += surnameNode.text + " " + givennameNode.text + ", "
    return authors

def getAbstract(doc):
    abstracts = ''
    abstracts += "<div class = \"abstract\" >\n"
    titleNode = doc.find("front/article-meta/abstract/title")
    abstracts += "<h4>" + titleNode.text + "</h4>\n"
    paraNodeList = doc.findall("front/article-meta/abstract/p")
    for p in paraNodeList:
        abstracts += ET.tostring(p)
    abstracts += "</div>\n"
    return abstracts

def getMain(figureNumber, doc):
    main = ''
    paraNodeList = doc.find("body")
    for n in paraNodeList.getchildren():
        if n.tag == "sec":
            children = n.getchildren()
            for c in children:
                if c.tag == "title":
                    main += "<h4>" + c.text + "</h4>";
                else:
                    figureStr = 'Fig. ' + figureNumber[len(figureNumber)-1]
                    if figureStr in ET.tostring(c):
                        main += '<div class = \"main highlight\">' + ET.tostring(c) + '</div>'
                    else:
                        main += "<div class = \"main\">" + ET.tostring(c) + "</div>"
    return main



